package com.jscloud.spark.pvuv

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

object UVCount {
  def main(args: Array[String]): Unit = {
    //程序入口类 SparkContext
    val sparkConf: SparkConf = new SparkConf().setAppName("UVCount").setMaster("local[*]")
    val sc: SparkContext = new SparkContext(sparkConf)
    sc.setLogLevel("WARN")

    val dataRdd: RDD[String] = sc.textFile("file:///D:\\JSProjects\\jsCloud-bigdata-app\\sparkapp\\src\\main\\resources\\access.log")

    val ipRdd: RDD[String] = dataRdd.map(x => x.split(" ")(0))

    val distincRdd: RDD[String] = ipRdd.distinct()

    val result: Long = distincRdd.count()

    println(result)

    sc.stop()
  }
}
